The filters applied to the parcel and crime data are:
# Get crime data from ArcGIS API and remove (4) entries with missing geometry.
# Write cleaned data to GeoJSON
# crime_dg <- st_read("https://utility.arcgis.com/usrsvcs/servers/0eaa6be357a74f5280157125e9b547fc/rest/services/OpenData_PublicSafety/FeatureServer/2/query?outFields=*&where=1%3D1&f=geojson")
# crime_dg$Date <- as.Date(fread(".//data//crime_dg.csv")$Date, "%Y/%m/%d")
# crime_dg <- crime_dg[!st_is_empty(crime_dg),]
# st_write(crime_dg, ".//data//crime_dg.geojson")
# # Read in the Hartford crime and parcel data
# crime_dg <- st_read(".//data//crime_dg.geojson")
# parcel_dg <- st_read(".//data//parcel_hartford.geojson")
# # Filter crime data to 2016-2021 and parcel data to single family homes
# crime_dg <- crime_dg |>
# subset(year(Date) %in% 2016:2021) |>
# st_transform(st_crs(hartford_tracts))
# parcel_dg <- parcel_dg |>
# subset(State_Use_Description == "ONE FAMILY") |>
# st_transform(st_crs(hartford_tracts))
# Get the census tracts from Tigris for the map baselayer
hartford_census_data <- get_acs(
geography = "tract", variable = c("B19013_001", "B01001_001"),
state = "CT", county = "Hartford", year = 2021
) |>
dplyr::select(GEOID, variable, estimate) |>
tidyr::pivot_wider(names_from = variable, values_from = estimate) |>
dplyr::rename(population = "B01001_001", med_income = "B19013_001")
hartford_tracts <- st_filter(tracts(state = "CT"),
subset(county_subdivisions(state = "CT"),
NAMELSAD == "Hartford town"),
.predicate = st_within)
hartford_tracts <- merge(hartford_tracts, hartford_census_data, by = "GEOID")
water <- st_intersection(area_water("CT", "Hartford"), hartford_tracts)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# Read in the Hartford crime and parcel data
# Load prepared crime data with kernel density estimates for thefts & violence
# See demo2 for details
crime_dg <- st_read(".//data//crime_hartford_2016_2021.geojson")
## Reading layer `crime_hartford_2016_2021' from data source
## `C:\Users\llint\OneDrive - Yale University\classes\625\CT Property\SDS625\data\crime_hartford_2016_2021.geojson'
## using driver `GeoJSON'
## Simple feature collection with 197060 features and 12 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -72.71865 ymin: 41.72403 xmax: -72.65041 ymax: 41.80719
## Geodetic CRS: WGS 84
parcel_dg <- st_read(".//data//parcel_hartford_single_family_crimestats.geojson")
## Reading layer `parcel_hartford_single_family_crimestats' from data source
## `C:\Users\llint\OneDrive - Yale University\classes\625\CT Property\SDS625\data\parcel_hartford_single_family_crimestats.geojson'
## using driver `GeoJSON'
## Simple feature collection with 7239 features and 51 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -72.71637 ymin: 41.72374 xmax: -72.65809 ymax: 41.80743
## Geodetic CRS: WGS 84
# Plot
ggplot(crime_dg) +
geom_sf(data = hartford_tracts) +
geom_sf(data = water, color = "blue", linewidth = 0.35) +
geom_hex(aes(X, Y, fill = log10(..count..)),
data = ~cbind(.x, st_coordinates(.x)), alpha = 0.75, bins = 50) +
scale_fill_binned() +
scale_x_continuous(guide = guide_axis(angle = 45)) +
labs(x = "Latitude", y = "Longitude") +
theme_bw()
A sample of the data is plotted. Crime is in red, parcels are in black, and the density of crime is in blue. Additional census tract level information like median income and population are included in the tooltip. Interact with the map for details.
g <- ggplot(dplyr::sample_n(crime_dg, 1e3)) +
geom_sf(data = hartford_tracts, aes(label1 = GEOID,
label2 = med_income,
label3 = population)) +
geom_sf(data = dplyr::sample_n(parcel_dg, 1e3)) +
geom_density_2d(aes(X,Y), data = ~cbind(.x, st_coordinates(.x))) +
stat_sf_coordinates(size = 0.1, color = "red") +
labs(x = "Latitude", y = "Longitude") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45))
p <- toWebGL(ggplotly(g))
p$x$data[[4]]$hoverinfo <- "none"
p
The manually curated variables are:
Price: Assessed total value of the parcelThefts: Number of thefts (robberies, burglaries,
larceny, theft, or stolen property) within 150 meters of the parcelViolence: Number of violent crimes (assaults,
homicides, shootings) within 150 meters of the parcelLiving_Area: Living area of the parcelEffective_Area: Effective area of the parcelYear: Approximate year the parcel was builtBed: Number of bedrooms in the parcelBath: Number of bathrooms in the parcelViolent crime rates are computed as the average number of violent crimes within 150 meters of a parcel in each tract. The average property value is computed as the mean assessed total value of the parcels in the tract.
# Subset the data and rename columns
parcel_dg %<>%
subset(select = c(
"Assessed_Total", "Living_Area", "Effective_Area", "AYB",
"Number_of_Bedroom", "Number_of_Baths", "Condition_Description",
"Violence"))
parcel_dg %<>%
dplyr::rename(
Price = "Assessed_Total", Year = "AYB", Bed = "Number_of_Bedroom",
Bath = "Number_of_Baths", Condition = "Condition_Description")
parcel_dg$Condition %<>%
factor(levels = c("Dilapidated", "Very Poor", "Poor", "Fair", "Fair-Avg",
"Average", "Avg-Good", "Good", "Good-VG", "Very Good",
"Excellent"))
# Drop rows with NAs
paste0("Dropping n=", nrow(parcel_dg) - nrow(na.omit(parcel_dg)),
" rows with NAs.")
## [1] "Dropping n=19 rows with NAs."
X <- na.omit(parcel_dg)
# Assign each parcel to a census tract
X$GEOID <- hartford_tracts$GEOID[st_intersects(
st_transform(X, crs = 26956),
st_transform(hartford_tracts, crs = 26956)) |> sapply(head, 1)]
# Compute rate of violent crimes per person in each tract
# and average property value in each tract
setDT(copy(X))[as.data.table(hartford_tracts),
.(violence_rate = mean(Violence),
avg_property_value = mean(Price)),
on = "GEOID", by = .EACHI]